View Javadoc

1   
2   /*
3    * SmartCrawler
4    *
5    * $Id: Link.java,v 1.8 2005/08/05 15:55:53 vincool Exp $
6    * Copyright 2005 Davide Pozza
7    *
8    * This program is free software; you can redistribute it
9    * and/or modify it under the terms of the GNU General Public
10   * License as published by the Free Software Foundation;
11   * either version 2 of the License, or (at your option) any
12   * later version.
13   *
14   * This program is distributed in the hope that it will be
15   * useful, but WITHOUT ANY WARRANTY; without even the implied
16   * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17   * PURPOSE. See the GNU General Public License for more
18   * details.
19   *
20   * You should have received a copy of the GNU General Public
21   * License along with this program; if not, write to the Free
22   * Software Foundation, Inc., 59 Temple Place, Suite 330,
23   * Boston, MA 02111-1307 USA
24   *
25   */
26  
27  package org.smartcrawler.common;
28  
29  import java.net.MalformedURLException;
30  import java.net.URL;
31  import org.apache.log4j.Logger;
32  
33  
34  /***
35   *  This object represents a simple html link.
36   *
37   * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
38   * @version <tt>$Revision: 1.8 $</tt>
39   */
40  public class Link {
41  
42      private static Logger log = SCLogger.getLogger(Link.class);
43  
44      private String urlStr;
45      private URL url;
46  
47      /***
48       *  Constructor.
49       *
50       * @param urlStr The link string
51       * @throws org.smartcrawler.common.MalformedLinkException
52       *
53       */
54      public Link(String str) throws MalformedLinkException {
55          //the supplied link must be "well formed" and must be a valid URL
56          if (!str.toLowerCase().startsWith("http://")) {
57              str = "http://" + str;
58          }
59          try {
60              this.url = new URL(str);
61          } catch (MalformedURLException e) {
62              throw new MalformedLinkException("The link " + str
63                      + " does not represents a valid URL");
64          }
65          //this.urlStr = str;
66          this.urlStr = this.url.getProtocol() + "://"
67                  + this.url.getHost()
68                  + (this.url.getPath() == null ? "" : this.url.getPath())
69                  + (this.url.getQuery() == null ? "" : "?" + this.url.getQuery());
70  
71      }
72  
73      /***
74       *
75       * @return
76       */
77      public URL getURL() {
78          return this.url;
79      }
80  
81      /***
82       *
83       * @return
84       */
85      public String toString() {
86          return urlStr;
87      }
88  
89      /***
90       *
91       * @param includeFile
92       * @return
93       */
94      public String getPath(boolean includeFile) {
95          String res = this.url.getPath();
96          if (!includeFile) {
97              int idx = res.lastIndexOf("/");//last part
98              if (idx >= 0 ) {
99                  String tmp = res.substring(idx);
100                 if (tmp.indexOf(".") > 0) { //check if is a file by extension
101                     res = res.substring(0, idx);
102                 }
103             }
104             if (res.endsWith("/")) {
105                 res = res.substring(0, res.length() - 1);
106             }
107         }
108         log.debug("getPath: urlStr=" + urlStr + " res=" + res);
109         return res;
110     }
111 
112     /***
113      *
114      * @return
115      */
116     public String getHost() {
117         return this.url.getHost();
118     }
119 
120     /***
121      *
122      * @param objLink
123      * @return
124      */
125     public boolean equals(Object objLink) {
126         if (this == objLink) {
127             return true;
128         }
129         if(objLink instanceof Link) {
130             Link link = (Link)objLink;
131             return this.toString().equals(link.toString());
132         } else {
133             return false;
134         }
135     }
136 
137     /***
138      *
139      * @return
140      */
141     public synchronized int hashCode() {
142         return this.toString().hashCode();
143     }
144 }